from draw_relevance import read_excel
import jieba
from collections import Counter


def get_keywords(all_titles):
    title_freqs = {}
    all_words = []
    for year in all_titles:
        year_titles = all_titles[year]
        for area in year_titles:
            area_titles = year_titles[area]
            for title in area_titles:
                word_list = jieba.cut(title, cut_all=True)
                for word in word_list:
                    if len(word) > 1:
                        all_words.append(word)
    word_count = Counter(all_words)
    keywords_freqs = word_count.most_common(21)
    return keywords_freqs


def main():
    src_path = './input/副本九届职教成果奖汇总.xlsx'

    all_titles = read_excel(src_path)
    keywords_freqs = get_keywords(all_titles)
    print(keywords_freqs)
    keywords = []
    for key_freq in keywords_freqs:
        keywords.append(key_freq[0])
    print(keywords)



if __name__ == '__main__':
    main()
